{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Using `perspective.Table`\n",
    "This notebook illustrates the concepts and usage of `perspective.Table`, Perspective's core component that allows for lightning-fast data loading, update, and transformation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from perspective import Table\n",
    "from datetime import date, datetime\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Perspective supports 6 core data types: \n",
    "- `int`\n",
    "- `float`\n",
    "- `str`\n",
    "- `bool`\n",
    "- `datetime`\n",
    "- `date`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = {\n",
    "    \"int\": [i for i in range(4)],\n",
    "    \"float\": [i * 1.25 for i in range(4)],\n",
    "    \"str\": [\"a\", \"b\", \"c\", \"d\"],\n",
    "    \"bool\": [True, False, True, False],\n",
    "    \"date\": [date.today() for i in range(4)],\n",
    "    \"datetime\": [datetime.now() for i in range(4)]\n",
    "}\n",
    "\n",
    "# data can be in row format\n",
    "rows = [{\"a\": 1, \"b\": True}, {\"a\": 2, \"b\": False}]\n",
    "\n",
    "# contain numpy arrays\n",
    "npdata = {\n",
    "    \"a\": np.arange(0, 2),\n",
    "    \"b\": np.array([\"a\", \"b\"], dtype=object),\n",
    "    \"nullable\": np.full(2, np.nan), # perspective handles `None` and `np.nan` values\n",
    "    \"mixed\": [None, 1]\n",
    "}\n",
    "\n",
    "# or pandas DataFrames\n",
    "df = pd.DataFrame(npdata)\n",
    "\n",
    "# or a mixture:\n",
    "mixed = {\n",
    "    \"a\": np.arange(100),\n",
    "    \"b\": [\"str\" for i in range(100)],\n",
    "    \"c\": [None for i in range(100)],\n",
    "    \"d\": np.full(100, np.nan),\n",
    "    \"e\": [datetime.now() for i in range(100)]\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Perspective schemas can be used as an explicit type mapping for columns - if a schema isn't provided on `__init__`, types will be inferred."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "schema = {\n",
    "    \"int\": float,\n",
    "    \"float\": int,\n",
    "    \"str\": str,\n",
    "    \"bool\": bool,\n",
    "    \"date\": datetime,\n",
    "    \"datetime\": datetime\n",
    "}\n",
    "\n",
    "# types can be specified using Python types or string representations\n",
    "schema2 = {\n",
    "    \"int\": \"integer\",\n",
    "    \"float\": \"float\",\n",
    "    \"str\": \"string\",\n",
    "    \"bool\": \"boolean\",\n",
    "    \"date\": \"date\",\n",
    "    \"datetime\": \"datetime\"\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a `perspective.Table` by passing in a dataset or a schema."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table = Table(data)\n",
    "\n",
    "# tables can be created from schema\n",
    "table2 = Table(schema)\n",
    "assert table2.size() == 0\n",
    "\n",
    "# constructing a table with an index, which is a column name to be used as the primary key\n",
    "indexed = Table(data, index=\"str\")\n",
    "\n",
    "# or a limit, which is a total cap on the number of rows in the table - updates past `limit` overwite at row 0\n",
    "limited = Table(data, limit=2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Tables have their own metadata:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"Table has {} rows\".format(table.size()))\n",
    "print(\"Table columns:\", table.columns())\n",
    "print(\"Table schema:\", table.schema())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`update(data)` can be called on the table instance."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# you can update all columns\n",
    "table.update(data)\n",
    "print(\"after update:\", table.size())\n",
    "\n",
    "# or however many you'd like\n",
    "table.update({\n",
    "    \"int\": [5, 6, 7],\n",
    "    \"str\": [\"x\", \"y\", \"z\"]\n",
    "})\n",
    "\n",
    "# but you cannot add new columns through updating - create a new Table instead\n",
    "try:\n",
    "    table.update({\n",
    "        \"abcd\": [1]\n",
    "    })\n",
    "except:\n",
    "    pass\n",
    "\n",
    "# updates on unindexed tables always append\n",
    "print(\"after append:\", table.size())\n",
    "\n",
    "# updates on indexed tables should include the primary key - the new data overwrites at the row specified by the primary key\n",
    "indexed.update([{\"str\": \"b\", \"int\": 100}])\n",
    "print(\"after indexed partial update:\", indexed.size())\n",
    "\n",
    "# without a primary key, the update appends to the end of the dataset\n",
    "indexed.update([{\"int\": 101}])\n",
    "print(\"after indexed append:\", indexed.size())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`remove([pkeys])` can also be called **on indexed tables** - when provided with an array of primary keys, Perspective removes the rows at those keys."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "indexed.remove([\"a\", \"b\"])\n",
    "print(\"after remove:\", indexed.size())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Creating and using Views\n",
    "A `View` represents an immutable set of transformations on the `perspective.Table`.\n",
    "\n",
    "##### _Create a view using `table.view()`_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "view = table.view() # a view with zero transformations - returns the dataset as passed in\n",
    "\n",
    "# view metadata\n",
    "print(\"View has {} rows and {} columns\".format(view.num_rows(), view.num_columns()))\n",
    "print(view.schema())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### _Apply transformations to it_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pivoted = table.view(row_pivots=[\"int\"], column_pivots=[\"str\"]) # group and split the underlying dataset\n",
    "\n",
    "aggregated = table.view(row_pivots=[\"int\"], aggregates={\"float\": \"avg\"}) # specify aggregations for individual columns\n",
    "\n",
    "subset = table.view(columns=[\"float\"]) # show only the columns you're interested in\n",
    "\n",
    "sorted_view = table.view(sort=[[\"str\", \"desc\"], [\"int\", \"asc\"]]) # sort on a specific column, or multiple columns\n",
    "\n",
    "filtered = table.view(filter=[[\"int\", \">\", 2]]) # filter the dataset on a specific value"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "_Output serialized data from the view_\n",
    "\n",
    "Perspective outputs data in several formats:\n",
    "- `to_records`: outputs a list of dictionaries, each of which is a single row\n",
    "- `to_dict`: outputs a dictionary of lists, each string key the name of a column\n",
    "- `to_numpy`: outputs a dictionary of numpy arrays\n",
    "- `to_df`: outputs a `pandas.DataFrame`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rows = view.to_records()\n",
    "\n",
    "columnar = view.to_dict()\n",
    "\n",
    "np_out = view.to_numpy()\n",
    "\n",
    "df_out = view.to_df()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Data from pivoted or otherwise transformed views reflect the state of the transformed dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_df = filtered.to_df()\n",
    "filtered_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`update()` on the underlying table propagate to views."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "v1 = table.view()\n",
    "v2 = table.view(row_pivots=[\"int\"])\n",
    "print(\"v1 has {} rows and {} columns\".format(v1.num_rows(), v1.num_columns()))\n",
    "print(\"v2 has {} rows and {} columns\".format(v2.num_rows(), v2.num_columns()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "table.update({\"int\": [100, 200, 300, 400]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"v1 has {} rows and {} columns\".format(v1.num_rows(), v1.num_columns()))\n",
    "print(\"v2 has {} rows and {} columns\".format(v2.num_rows(), v2.num_columns()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Callbacks\n",
    "\n",
    "Callback functions can be set on both the `Table` and the `View` instances.\n",
    "\n",
    "- `View.on_update`: a callback that will be fired on the specified view instance after its underlying table completes processing of updated data. \n",
    "    * multiple `on_update` callbacks can be set, and they will be fired in the order in which they were set.\n",
    "- `View.on_delete`: a callback that will be fired when the view's `delete()` method is called, either manually or by the GC.\n",
    "    * multiple `on_delete` callbacks can be set.\n",
    "- `Table.on_delete`: a callback that will be fired when the table's `delete()` method is called, either manually or by the GC.\n",
    "    * multiple `on_delete` callbacks can be set."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define a simple callback\n",
    "update_counter = 0\n",
    "def callback(port_id):\n",
    "    global update_counter\n",
    "    update_counter += 1\n",
    "\n",
    "new_table = Table(data)\n",
    "v3 = new_table.view()\n",
    "v3.on_update(callback)\n",
    "for i in range(5):\n",
    "    new_table.update(data)\n",
    "\n",
    "print(\"The table was updated {} times\".format(update_counter))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create an on_delete callback\n",
    "def deleted():\n",
    "    print(\"Finished deleting.\")\n",
    "new_table.on_delete(deleted)\n",
    "v3.on_delete(deleted)\n",
    "v3.delete()\n",
    "new_table.delete()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Callbacks can be removed using the `remove_update` or `remove_delete` methods, both of which take a function to remove."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "counter = 0\n",
    "\n",
    "def update_callback(port_id):\n",
    "    global counter\n",
    "    counter += 1\n",
    "    \n",
    "def delete_callback():\n",
    "    print(\"DELETED\")\n",
    "\n",
    "table4 = Table(data)\n",
    "view1 = table4.view()\n",
    "\n",
    "view1.on_update(update_callback)\n",
    "view1.on_delete(delete_callback)\n",
    "\n",
    "for i in range(5):\n",
    "    table4.update(data)\n",
    "    \n",
    "# Remove the update callback\n",
    "view1.remove_update(update_callback)\n",
    "\n",
    "for i in range(5):\n",
    "    table4.update(data) # should no longer trigger callback\n",
    "    \n",
    "print(\"The table was updated {} times\".format(counter))\n",
    "\n",
    "# Removes the delete callback\n",
    "view1.remove_delete(delete_callback)\n",
    "view1.delete() # should not call the callback"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
